full.model <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Sex*Cluster_current + Semester*Cluster_current + Sex*Semester + Semester, random = ~1 | BARCS_ID, data = data.file.long, na.action = na.exclude, method = "ML") 

full.model.reduced.interactions <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester, random = ~ 1 | BARCS_ID,data = data.file.long, na.action = na.exclude, method = "ML")

small.model <- lme(GPA ~ Cluster_current * Semester, random = ~ 1 | BARCS_ID, data = data.file.long, na.action = na.exclude, method = "ML")

pseudo.trajectory <- lme(GPA ~ 1 + Fager4_binary + FH_binary + Sex + Cluster_SEM1 + Semester + Age1stround + SATMath + SATVerbal + SATWriting + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Cluster_SEM1 * Semester + Group_transition1 * Semester, random = ~ 1 | BARCS_ID, data = data.file.long, method = "REML", na.action = na.exclude)

fm.time.slope <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Sex*Cluster_current + Time*Cluster_current + Sex*Time + Time , random = ~ 1 + Time| BARCS_ID, data = data.file.long, na.action = na.exclude, method = "ML") 
pbkrtest::KRmodcomp(full.model.lmer, full.model.lmer.reduced.interactions) ## only allows lmer -.-
## large : GPA ~ Cluster_current + Sex + Age1stround + SATMath + SATVerbal + 
##     SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + 
##     BDI_SELF_Total + Parental_SES + Semester + (1 | BARCS_ID) + 
##     Cluster_current:Sex + Cluster_current:Semester + Sex:Semester
## small : GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + 
##     SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + 
##     BDI_SELF_Total + Parental_SES + Semester * Cluster_current + 
##     Semester + (1 | BARCS_ID)
##            stat       ndf       ddf F.scaling p.value
## Ftest    0.5628    5.0000 2945.1811   0.99993  0.7286
anova(full.model.reduced.interactions, full.model)
##                                 Model df      AIC      BIC    logLik   Test
## full.model.reduced.interactions     1 24 5470.785 5617.665 -2711.393       
## full.model                          2 29 5477.951 5655.431 -2709.976 1 vs 2
##                                  L.Ratio p-value
## full.model.reduced.interactions                 
## full.model                      2.833905  0.7256
full.model.AR1 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                     random = ~ 1 | BARCS_ID, correlation = corAR1(form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude, method = "ML" )

full.model.Unstructured <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corSymm(form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude, method = "ML"  )

full.model.CompSymm <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corCompSymm(form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude, method = "ML"  )

full.model.Toelpitz <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corARMA(p = 0, q = 3, form = ~ Time| BARCS_ID),
                     data = data.file.long, na.action = na.exclude, method = "ML"  )

full.model.Exponential <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corExp(form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude, method = "ML"  )

full.model.gaussian <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corGaus(form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude, method = "ML"  )

full.model.MA1 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corARMA(q = 1, form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude, method = "ML" )

full.model.ARMA11 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corARMA(p = 1, q = 1, form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude , method = "ML"  )

full.model.ARMA12 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corARMA(p = 1, q = 2, form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude, method = "ML"   )

full.model.ARMA21 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corARMA(p = 2, q = 1, form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude , method = "ML" )

full.model.ARMA22 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester*Cluster_current + Semester,
                       random = ~ 1 | BARCS_ID, correlation = corARMA(p = 2, q = 2, form = ~ Time| BARCS_ID), 
                     data = data.file.long, na.action = na.exclude , method = "ML")

AIC_values <- AIC(full.model.AR1, full.model.Unstructured, full.model.CompSymm, full.model.Toelpitz, full.model.Exponential, full.model.gaussian, full.model.MA1, full.model.ARMA11, full.model.ARMA12, full.model.ARMA21, full.model.ARMA22)
BIC_values <- BIC(full.model.AR1, full.model.Unstructured, full.model.CompSymm, full.model.Toelpitz, full.model.Exponential, full.model.gaussian, full.model.MA1, full.model.ARMA11, full.model.ARMA12, full.model.ARMA21, full.model.ARMA22)
df_scores_Cor <- data.frame(AIC_values, BIC_values)
df_scores_Cor[,-3]
##                         df      AIC      BIC
## full.model.AR1          25 5417.072 5570.072
## full.model.Unstructured 30 5411.590 5595.190
## full.model.CompSymm     25 5472.785 5625.785
## full.model.Toelpitz     27 5411.958 5577.198
## full.model.Exponential  25 5417.072 5570.072
## full.model.gaussian     25 5424.321 5577.321
## full.model.MA1          25 5424.587 5577.586
## full.model.ARMA11       26 5411.011 5570.130
## full.model.ARMA12       27 5411.958 5577.198
## full.model.ARMA21       27 5411.958 5577.198
## full.model.ARMA22       28 5413.958 5585.318
residuals.AR1 <- plot(full.model.AR1, main = "AR1")
residuals.Unstructured <- plot(full.model.Unstructured, main = "Unstructured")
residuals.CompSymm <- plot(full.model.CompSymm, main = "Comp Symmetry")
residuals.Toelpitz <- plot(full.model.Toelpitz, main = "Toelpitz")
residuals.Exponential <- plot(full.model.Exponential, main = "Exponential") 
residuals.Gaussian <- plot(full.model.gaussian, main = "Gaussian")
residuals.MA1 <- plot(full.model.MA1, main = "MA1")
residuals.ARMA11 <- plot(full.model.ARMA11, main = "ARMA11")
residuals.ARMA12 <- plot(full.model.ARMA12, main = "ARMA12")
residuals.ARMA21 <- plot(full.model.ARMA21, main = "ARMA21")
residuals.ARMA22 <- plot(full.model.ARMA22, main = "ARMA22")

cowplot::plot_grid(residuals.AR1, residuals.Toelpitz, residuals.ARMA11, residuals.ARMA12, nrow = 2)

cowplot::plot_grid(residuals.Unstructured, residuals.CompSymm, residuals.Exponential, residuals.Gaussian)

There seems to be a flat optimization plane. AIC –> ARMA (1,1), BIC –> AR(1)

choosing the ARMA (1,1) structure, due to the paper deciding on the AIC.

small.model.ARMA11 <- lme(GPA ~ Cluster_current * Semester, random = ~ 1 | BARCS_ID, data = data.file.long, na.action = na.exclude, method = "ML",  correlation = corARMA(p = 1, q = 1, form = ~ Time| BARCS_ID))

pseudo.trajectory.ARMA11 <- lme(GPA ~ 1 + Fager4_binary + FH_binary + Sex + Cluster_SEM1 + Semester + Age1stround + SATMath + SATVerbal + SATWriting + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Cluster_SEM1 * Semester + Group_transition1 * Semester, random = ~ 1 | BARCS_ID, data = data.file.long, method = "ML", na.action = na.exclude, correlation = corARMA(p = 1, q = 1, form = ~ Time| BARCS_ID))

time.slope.ARMA11 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Sex*Cluster_current + Time*Cluster_current + Sex*Time + Time , random = ~ 1 + Time| BARCS_ID, data = data.file.long, na.action = na.exclude, method = "ML", correlation = corARMA(p = 1, q = 1, form = ~ Time| BARCS_ID),control = list(maxIter = 1000, tolerance = 1e-3, opt = "optim")) 

small.model.AR1 <- lme(GPA ~ Cluster_current * Semester, random = ~ 1 | BARCS_ID, data = data.file.long, na.action = na.exclude, method = "ML", correlation = corAR1(form = ~ Time| BARCS_ID))

pseudo.trajectory.AR1 <- lme(GPA ~ 1 + Fager4_binary + FH_binary + Sex + Cluster_SEM1 + Semester + Age1stround + SATMath + SATVerbal + SATWriting + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Cluster_SEM1 * Semester + Group_transition1 * Semester, random = ~ 1 | BARCS_ID, data = data.file.long, method = "ML", na.action = na.exclude, correlation = corAR1(form = ~ Time| BARCS_ID))

time.slope.AR1 <- lme(GPA ~ 1 + Cluster_current + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Sex*Cluster_current + Time*Cluster_current + Sex*Time + Time , random = ~ 1 + Time| BARCS_ID, data = data.file.long, na.action = na.exclude, method = "ML", correlation = corAR1(form = ~ Time| BARCS_ID)) 

AIC_scores <- AIC(full.model.ARMA11, full.model.AR1, small.model.ARMA11, small.model.AR1, pseudo.trajectory.ARMA11, pseudo.trajectory.AR1, time.slope.ARMA11, time.slope.AR1)
## Warning in AIC.default(full.model.ARMA11, full.model.AR1, small.model.ARMA11, :
## models are not all fitted to the same number of observations
BIC_scores <- BIC(full.model.ARMA11, full.model.AR1, small.model.ARMA11, small.model.AR1, pseudo.trajectory.ARMA11, pseudo.trajectory.AR1, time.slope.ARMA11, time.slope.AR1)
## Warning in BIC.default(full.model.ARMA11, full.model.AR1, small.model.ARMA11, :
## models are not all fitted to the same number of observations
df_scores <- data.frame(AIC_scores, BIC_scores)
df_scores[,-3]
##                          df      AIC      BIC
## full.model.ARMA11        26 5411.011 5570.130
## full.model.AR1           25 5417.072 5570.072
## small.model.ARMA11       16 6385.916 6485.809
## small.model.AR1          15 6395.358 6489.007
## pseudo.trajectory.ARMA11 34 5962.190 6173.629
## pseudo.trajectory.AR1    33 5962.996 6168.216
## time.slope.ARMA11        25 5419.128 5572.128
## time.slope.AR1           24 5419.714 5566.594

the first one with manually computed significance with the SIdak correction, the second regular Anova output. The extra commands dont seem to do anything really unfortunately

print(as.data.frame(anova.pt))
##                            numDF denDF     Fvalue       pvalue significant
## (Intercept)                    1  2695 33932.3480 0.000000e+00         ***
## Fager4_binary                  1   985    25.0115 6.744727e-07         ***
## FH_binary                      1   985     6.4890 1.100532e-02            
## Sex                            1   985    24.7649 7.641016e-07         ***
## Cluster_SEM1                   2   985    19.4252 5.319448e-09         ***
## Semester                       3  2695     4.6134 3.179569e-03           *
## Age1stround                    1   985     4.4715 3.471572e-02            
## SATMath                        1   985   178.9563 0.000000e+00         ***
## SATVerbal                      1   985    24.8150 7.449722e-07         ***
## SATWriting                     1   985    19.4518 1.145224e-05         ***
## STAI_SELF_Total                1   985     0.3131 5.759329e-01            
## BDI_SELF_Total                 1   985     4.3129 3.808356e-02            
## Parental_SES                   1   985     4.3523 3.721633e-02            
## Group_transition1              2   985     4.0941 1.695495e-02            
## Cluster_SEM1:Semester          6  2695     1.1905 3.081967e-01            
## Semester:Group_transition1     6  2695     0.7936 5.748391e-01
Anova(pseudo.trajectory.ARMA11, test.statistic = "F",  robust= "hc3", correction = "Sidak")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted

## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted

## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Analysis of Deviance Table (Type II tests)
## 
## Response: GPA
##                              Chisq Df Pr(>Chisq)    
## Fager4_binary               7.1226  1   0.007612 ** 
## FH_binary                   1.4046  1   0.235954    
## Sex                        21.3295  1  3.867e-06 ***
## Cluster_SEM1               47.1070  2  5.900e-11 ***
## Semester                   14.0422  3   0.002848 ** 
## Age1stround                 4.7450  1   0.029383 *  
## SATMath                    28.9591  1  7.392e-08 ***
## SATVerbal                   1.6565  1   0.198082    
## SATWriting                 20.2297  1  6.868e-06 ***
## STAI_SELF_Total             0.8879  1   0.346035    
## BDI_SELF_Total              4.2848  1   0.038455 *  
## Parental_SES                3.9228  1   0.047635 *  
## Group_transition1           8.2109  2   0.016483 *  
## Cluster_SEM1:Semester       4.7318  6   0.578640    
## Semester:Group_transition1  4.8003  6   0.569672    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted

## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted

## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Analysis of Deviance Table (Type II Wald F tests with Kenward-Roger df)
## 
## Response: GPA
##                                  F Df  Df.res    Pr(>F)    
## Fager4_binary               7.1522  1  993.14  0.007610 ** 
## FH_binary                   1.2379  1  990.71  0.266149    
## Sex                        20.4954  1  980.59 6.711e-06 ***
## Cluster_SEM1               22.9339  2  985.96 1.839e-10 ***
## Semester                    4.7585  3 2733.37  0.002595 ** 
## Age1stround                 4.5078  1 1025.82  0.033980 *  
## SATMath                    29.3119  1  973.12 7.763e-08 ***
## SATVerbal                   1.4649  1  975.50  0.226442    
## SATWriting                 20.5234  1  974.99 6.620e-06 ***
## STAI_SELF_Total             1.0099  1  980.92  0.315182    
## BDI_SELF_Total              3.8749  1  986.39  0.049294 *  
## Parental_SES                3.7321  1  976.97  0.053664 .  
## Group_transition1           3.9939  2  981.76  0.018727 *  
## Cluster_SEM1:Semester       0.9069  6 2739.90  0.488726    
## Semester:Group_transition1  0.9043  6 2734.68  0.490655    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

replicates the table with significances on page 9

anova(full.model.ARMA11, full.model.ARMA11.slope)
## Warning in anova.lme(full.model.ARMA11, full.model.ARMA11.slope): fitted
## objects with different fixed effects. REML comparisons are not meaningful.
##                         Model df      AIC      BIC    logLik   Test   L.Ratio
## full.model.ARMA11           1 26 5564.076 5723.025 -2756.038                 
## full.model.ARMA11.slope     2 28 5567.384 5738.560 -2755.692 1 vs 2 0.6920936
##                         p-value
## full.model.ARMA11              
## full.model.ARMA11.slope  0.7075

The performed Likelihood test with a pvalue of 0.7075 leads to a clear rejection of the inclusion of a time slope.

page 7

plot(effect("Semester*Cluster_current", full.model.ARMA11, robust= "hc3", correction = "Sidak"))

plot(effect("Cluster_current*Semester", small.model.ARMA11, robust= "hc3", correction = "Sidak"))

plot(effect("Cluster_current*Time", time.slope.ARMA11, robust= "hc3", correction = "Sidak"))

plot(effect("Group_transition1", pseudo.trajectory.ARMA11, robust= "hc3", correction = "Sidak"))
## NOTE: Group_transition1 is not a high-order term in the model

plot(effect("Sex", full.model.ARMA11, robust= "hc3", correction = "Sidak"), main = "Effect of Gender")

plot(effect("Fager4_binary", full.model.ARMA11, robust= "hc3", correction = "Sidak"), main = "Effect of Smoking")

plot(fitted(full.model.ARMA11), resid(full.model.ARMA11), 
     xlab = "Fitted Values", 
     ylab = "Residuals",
     main = "Residuals vs Fitted Plot")
abline(h = 0, col = "red")

qqPlot(resid(full.model.ARMA11), 
       main = "Normal Q-Q Plot of Residuals")

## 48723 28452 
##  1415   391
sqrt_abs_resid <- sqrt(abs(resid(full.model.ARMA11)))
plot(fitted(full.model.ARMA11), sqrt_abs_resid, 
     xlab = "Fitted Values", 
     ylab = "Square Root of |Residuals|",
     main = "Scale-Location Plot")

hist(resid(full.model.ARMA11), 
     main = "Histogram of Residuals", 
     xlab = "Residuals", 
     breaks = 10, 
     col = "gray")

plot(ranef(full.model.ARMA11), 
       main = "Random Intercept")

dim(ranef(full.model.ARMA11))
## [1] 1004    1
print(range(ranef(full.model.ARMA11)))
## [1] -4.982805e-06  3.096283e-06
print(r.squaredGLMM(full.model.ARMA11)) ####TERRRIBLE!!!!!!!!!!!!
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help page.
##            R2m       R2c
## [1,] 0.1778163 0.1778177
plot(fitted(small.model.ARMA11), resid(small.model.ARMA11), 
     xlab = "Fitted Values", 
     ylab = "Residuals",
     main = "Residuals vs Fitted Plot")
abline(h = 0, col = "red")

qqPlot(resid(small.model.ARMA11), 
       main = "Normal Q-Q Plot of Residuals")

## 48723 43403 
##  1415  1409
sqrt_abs_resid <- sqrt(abs(resid(small.model.ARMA11)))
plot(fitted(small.model.ARMA11), sqrt_abs_resid, 
     xlab = "Fitted Values", 
     ylab = "Square Root of |Residuals|",
     main = "Scale-Location Plot")

hist(resid(small.model.ARMA11), 
     main = "Histogram of Residuals", 
     xlab = "Residuals", 
     breaks = 10, 
     col = "gray")

plot(ranef(small.model.ARMA11), 
       main = "Random Intercept")

print(dim(ranef(small.model.ARMA11)))
## [1] 1140    1
print(range(ranef(small.model.ARMA11)))
## [1] -6.560243e-06  4.037665e-06
print(r.squaredGLMM(small.model.ARMA11)) ####TERRRIBLE!!!!!!!!!!!!
##             R2m        R2c
## [1,] 0.01678364 0.01678612
plot(fitted(pseudo.trajectory.ARMA11), resid(pseudo.trajectory.ARMA11), 
     xlab = "Fitted Values", 
     ylab = "Residuals",
     main = "Residuals vs Fitted Plot")
abline(h = 0, col = "red")

qqPlot(resid(pseudo.trajectory.ARMA11), 
       main = "Normal Q-Q Plot of Residuals")

## 28452 48723 
##   391  1415
sqrt_abs_resid <- sqrt(abs(resid(pseudo.trajectory.ARMA11)))
plot(fitted(pseudo.trajectory.ARMA11), sqrt_abs_resid, 
     xlab = "Fitted Values", 
     ylab = "Square Root of |Residuals|",
     main = "Scale-Location Plot")

hist(resid(pseudo.trajectory.ARMA11), 
     main = "Histogram of Residuals", 
     xlab = "Residuals", 
     breaks = 10, 
     col = "gray")

plot(ranef(pseudo.trajectory.ARMA11), 
       main = "Random Intercept")

dim(ranef(pseudo.trajectory.ARMA11))
## [1] 1000    1
print(range(ranef(pseudo.trajectory.ARMA11)))
## [1] -0.8528631  0.5650472
print(r.squaredGLMM(pseudo.trajectory.ARMA11)) ### somewhat better
##            R2m       R2c
## [1,] 0.1835358 0.4254371

the pseudo trajectory is doing a lot better here.

full.model.gls <- gls(GPA ~ 1 + Sex + Age1stround + SATMath + SATVerbal + SATWriting + Fager4_binary + FH_binary + STAI_SELF_Total + BDI_SELF_Total + Parental_SES  + Semester * Cluster_current, data = data.file.long, na.action = na.exclude, method = "REML", correlation = corARMA(p = 1, q = 1, form = ~ Time| BARCS_ID))
anova(full.model.gls, full.model.ARMA11)
##                   Model df      AIC      BIC    logLik   Test      L.Ratio
## full.model.gls        1 25 5562.076 5714.911 -2756.038                    
## full.model.ARMA11     2 26 5564.076 5723.025 -2756.038 1 vs 2 3.089681e-06
##                   p-value
## full.model.gls           
## full.model.ARMA11  0.9986
small.model.gls <- gls(GPA ~ Semester * Cluster_current, data = data.file.long, na.action = na.exclude, method = "REML", correlation = corARMA(p = 1, q = 1, form = ~ Time| BARCS_ID))
anova(small.model.ARMA11, small.model.gls)
## Warning in anova.lme(small.model.ARMA11, small.model.gls): fitted objects with
## different fixed effects. REML comparisons are not meaningful.
##                    Model df      AIC      BIC    logLik   Test      L.Ratio
## small.model.ARMA11     1 16 6447.394 6547.236 -3207.697                    
## small.model.gls        2 15 6445.394 6538.996 -3207.697 1 vs 2 4.999507e-06
##                    p-value
## small.model.ARMA11        
## small.model.gls     0.9982
pseudo.trajectory.gls <- gls(GPA ~ 1 + Fager4_binary + FH_binary + Sex + Cluster_SEM1 + Semester + Age1stround + SATMath + SATVerbal + SATWriting + STAI_SELF_Total + BDI_SELF_Total + Parental_SES + Cluster_SEM1 * Semester + Group_transition1 * Semester, data = data.file.long, na.action = na.exclude, correlation = corARMA(p = 1, q = 1, form = ~ Time| BARCS_ID))
anova(pseudo.trajectory.ARMA11, pseudo.trajectory.gls)
##                          Model df      AIC      BIC    logLik   Test   L.Ratio
## pseudo.trajectory.ARMA11     1 34 6147.618 6358.781 -3039.809                 
## pseudo.trajectory.gls        2 33 6145.947 6350.899 -3039.974 1 vs 2 0.3287907
##                          p-value
## pseudo.trajectory.ARMA11        
## pseudo.trajectory.gls     0.5664

RIP lol. PT isn’t as clear probably due to the fact that the Random effects actually do something

vif(full.model.gls)
##                               GVIF Df GVIF^(1/(2*Df))
## Sex                       1.145947  1        1.070489
## Age1stround               1.036041  1        1.017861
## SATMath                   1.910025  1        1.382037
## SATVerbal                 2.729602  1        1.652151
## SATWriting                3.252019  1        1.803336
## Fager4_binary             1.048463  1        1.023945
## FH_binary                 1.039356  1        1.019488
## STAI_SELF_Total           1.947916  1        1.395678
## BDI_SELF_Total            1.993410  1        1.411882
## Parental_SES              1.081658  1        1.040028
## Semester                 11.867058  3        1.510279
## Cluster_current           4.657988  2        1.469094
## Semester:Cluster_current 37.907462  6        1.353818
vif(pseudo.trajectory.gls)
##                                 GVIF Df GVIF^(1/(2*Df))
## Fager4_binary               1.078203  1        1.038366
## FH_binary                   1.041129  1        1.020357
## Sex                         1.155155  1        1.074782
## Cluster_SEM1                2.846460  2        1.298902
## Semester                   18.208709  3        1.621984
## Age1stround                 1.041380  1        1.020480
## SATMath                     1.908960  1        1.381651
## SATVerbal                   2.749887  1        1.658278
## SATWriting                  3.246544  1        1.801817
## STAI_SELF_Total             1.954151  1        1.397909
## BDI_SELF_Total              2.003921  1        1.415599
## Parental_SES                1.103742  1        1.050591
## Group_transition1           2.702576  2        1.282167
## Cluster_SEM1:Semester      33.305212  6        1.339294
## Semester:Group_transition1  8.317589  6        1.193071

Semester + Interaction have high / very high multicollinearity, the other fixed effects seem okay. For the pt, the Semester and Cluster categorization in Semester are highly multicollinear, as well as the group transition variable having a high collinear value. This makes of course sense as they are related.

(ACF.pt <- ACF(pseudo.trajectory.ARMA11, maxLag=4))
##   lag         ACF
## 1   0  1.00000000
## 2   1  0.19978412
## 3   2  0.02089272
## 4   3 -0.09718593
## 5   4         NaN
plot(ACF.pt, alpha=0.01) 

#stat.ethz.ch/R-manual/R-devel/library/nlme/html/ACF.lme.html

expected. There is an ARMA(1,1) Cov structure in the model assumption, a lag of 1 should be showcassed here and it is.

shapiro.test(resid(full.model.ARMA11))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(full.model.ARMA11)
## W = 0.95119, p-value < 2.2e-16
shapiro.test(resid(small.model.ARMA11))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(small.model.ARMA11)
## W = 0.948, p-value < 2.2e-16
shapiro.test(resid(pseudo.trajectory.ARMA11))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(pseudo.trajectory.ARMA11)
## W = 0.94165, p-value < 2.2e-16
shapiro.test(resid(time.slope.ARMA11)) ## additional tests are somewhat trivial
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(time.slope.ARMA11)
## W = 0.94257, p-value < 2.2e-16

strong evidence that the data is not normally distributed